#############################################################################################
# Replication Workshop 2103-2104                                                                      #
# Cambridge University, Social Sciences' Research Methods Centre (SSRMC) Training Programme #
# Lecturer: Nicole Janz www.nicolejanz.de/teaching                                                    #
#############################################################################################

##last updated on 05/02/2014 ##


#install packages!
install.packages("psych")
install.packages("data.table")
install.packages("ppcor")
install.packages("gmodels")
install.packages("vcd")
library(vcd)
#load packages
require(psych)

install.packages("vcd")
library(vcd)
assocstats(data)

########################  Script and Console  ################################

# Always step 1: Clear workspace
rm(list=ls()) #this removes all data from previous runs of R



########################  Your working directory  ################################

# Your "working directory" is the "folder" where R loads and saves data
getwd()
judgment <- read.csv("judgment.csv")
View(judgment)


####################  Get a 'feel' for the data  ####################



head(judgment)         # first few observations, top of dataset
dim(judgment) # the dimensions of the dataset (rows by columns)
colnames(judgment)    # column (generally variable) names
summary(judgment)      # summary statistics

##############################################################################
#Descriptives
##############################################################################

#take a look at variable names
names(judgment)

#look at histograms of your data
hist(judgment$CoinFlip)
hist(judgment$FreqCategory)

describe(judgment$CoinFlip)
describe(judgment$FreqCategory)

table(judgment$CoinFlip) ## Says 48 participants in Condition 1 and 38 in Condition 2. According to the paper, it has to be 31 and 32)
table(judgment$FreqCategory) ## Says 27, 27, 27. However, it has to be 23, 7, 33...I do not understand how 27 participants in the thrid group got 33 in the paper. 

#********Problems************************************#
# Thee are 81 participants in my dataset while the authors used 63 participants in their study. 
# Therefore, it seems to me that some participants were eleminated. I'll follow the authors' elemination process first and see if I can reach down to 63 participants.

#********************************************#
# Let's clean the dataset #

temp1 <- judgment
View(temp1) # From now I will leave Judgement file as it is and use temp1.
names(temp1)[names(temp1) == "CaseNr"]  <-  "ID" 

View(temp1)
head(temp1)  

## Make a new vaiable called "invalid" 
### I decided to give 1 on "invalid" for those who need to be eleminated at the end of this process. 
temp1$invalid <- NA
## step 1 MCheck1c ## Eleminate participants according to authors' logic 
temp1[temp1$MCheck1c=="0" & !is.na(temp1$MCheck1c), ]$invalid <- 1
temp1[temp1$MCheck2c=="0" & !is.na(temp1$MCheck2c), ]$invalid <- 1
temp1[temp1$MCheck3c=="0" & !is.na(temp1$MCheck3c), ]$invalid <- 1
View(temp1)

## step 2 ## Further elemination based on my understanding: Eleminate participants who answered incorrectly.
#No 54, No 72
##temp1= temp1[-c(54,72),]
temp1[temp1$ID=="22", ]$invalid <- 1
temp1[temp1$ID=="62" , ]$invalid <- 1
View(temp1)

# step 3 ## other potentially excluded participants # This step didnt' really help. So I decided not eleminate them. 
##temp1[temp1$MCheck4=="2,62" & !is.na(temp1$MCheck4), ]$invalid <- 1
##temp1[temp1$MCheck4=="1,04" & !is.na(temp1$MCheck4), ]$invalid <- 1
##temp1[temp1$MCheck4=="0,50" & !is.na(temp1$MCheck4), ]$invalid <- 1
##temp1[temp1$MCheck4=="1,00" & !is.na(temp1$MCheck4), ]$invalid <- 1
##temp1[temp1$MCheck4=="1,60" & !is.na(temp1$MCheck4), ]$invalid <- 1

##temp1[temp1$MCheck5=="1,75" & !is.na(temp1$MCheck5), ]$invalid <- 1
##temp1[temp1$MCheck5=="1,60" & !is.na(temp1$MCheck5), ]$invalid <- 1
##temp1[temp1$MCheck5=="1,5" & !is.na(temp1$MCheck5), ]$invalid <- 1
##temp1[temp1$MCheck5=="0/2" & !is.na(temp1$MCheck5), ]$invalid <- 1
##temp1[temp1$MCheck5=="Feb-00" & !is.na(temp1$MCheck5), ]$invalid <- 1 ## Attention## 
##temp1[temp1$MCheck5=="1,5" & !is.na(temp1$MCheck5), ]$invalid <- 1

#Final data ## I eleminated those who score 1 on invalid: six participants are now eleminated. 
temp1= temp1[-c(1,2,3,4, 54,72),]
View(temp1)

# Solutions? #
## I have emailed the main author twice as there seems to be a problem with the dataset he provided. 
## However, I have not received the author's response. (05/02/2014)

##############################################################################
#Replication of Figure 1 with my dataset
##############################################################################  
              
        temp1$CoinF <-NA
        temp1$CoinF[temp1$CoinFlip==1]="Flowers"
        temp1$CoinF[temp1$CoinFlip==2]="Birds"
        counts = table(temp1$FreqCategory, temp1$CoinF)
counts
counts2 = table(temp1$CoinF, temp1$FreqCategory)
counts2
#result(counts)    
#                 Birds Flowers
#-1 (more Birds)     13      11*
#0                   12      13
#1 (more flowers)    10*     16
# * <- marks the target groups 

        counts=prop.table(counts,2)
        counts=prop.table(counts,2)*100
        barplot(counts, main="Figure 1", xlab="Endowed Category", ylab="Percentage", legend = rownames(counts))
        barplot(counts, main="Figure 1", xlab="Endowed Category", ylab="Percentage", legend.text=c("FewerFlowers","Equal","FewerBirds"))
        #barplot(counts, main="Figure 1", xlab="Endowed Category", ylab="Percentage")

## Now I have successfuly created Figure 1 with my dataset. However the numbers are different from the original paper.  
## Therefore, I decided to use the data on the paper and replicate Figure 1 and compare the two.         
     
##############################################################################
#Replication of Figure 1 with the original dataset
##############################################################################
# Although my dataset seems to have more participats than in the paper, I decided to create Figure 1 with the results that appear on the paper. 


## Step 1: Create our contingency table, name it "T"

flowers <-  c(15,8)
equal <- c(4,3)
birds <- c(12, 21)
T <- rbind(flowers, equal, birds)
# Name rows and columns
colnames(T) <- c("Flowers","Birds")
rownames(T) <- c("Fewer Flowers","Equal", "Fewer Birds")
T

## Step 2: Create Figure 1
counts_T=prop.table(T,2)
counts_T=prop.table(T,2)*100
barplot(counts, main="Figure 1 Original", xlab="Endowed Category", ylab="Percentage", legend = rownames(counts_T))
barplot(counts, main="Figure 1 Original", xlab="Endowed Category", ylab="Percentage", legend.text=c("FewerFlowers","Equal","FewerBirds"))

        ##############################################################################
        # Chisq.test 
        ##############################################################################

# Chisq.test with the original paper 

## Step 1: Create our contingency table, name it "t" 
flowers <-  c(15,8)  
birds <- c(12, 21)
t<- rbind(flowers, birds)
t
# Name rows and columns
colnames(t) <- c("Flowers","Birds")
rownames(t) <- c("Fewer Flowers", "Fewer Birds")
t       

##Step 2: Chisq.test
chisq.test(t,correct=FALSE, rescale.p= TRUE) #X-squared = 4.5193, df = 1, p-value = 0.03351
chisq.test(t,correct=FALSE, simulate.p.value = TRUE)
## Original paper: X-squared = 4.51, p-value = 0.037, Prep =.93 

## Chi-square test with my datset ## 
counts2

Flowers <-  c(11,13)  
Birds <- c(16, 10)
counts_t<- rbind(Flowers, Birds)
# Name rows and columns
colnames(counts_t) <- c("Flowers","Birds")
rownames(counts_t) <- c("Fewer Flowers", "Fewer Birds")
counts_t
counts
chisq.test(counts_t)   #X-squared = 0.6876, df = 1, p-value = 0.407

#chisq.test(temp1$CoinFlip,temp1$FreqRatio) #47.3194, df = 1, p-value = 6.031e-12
## Interpretation: My data does not suppor the author's claim. 

##########################################################################
#Prep value # Original paper: .93
##########################################################################
### what's Prep? 
#  http://en.wikipedia.org/wiki/P-rep : p-rep computes the probability of replicating an effect.
p <- .037 # I used .037 because this is the value that the paper provides.
prep= (1 + (p/ (1- p))^(2/3) )^(-1)        
prep   #Prep = 0.8977763 # Prep in the original paper = .93
      #Interpretation: The authors might use a different equation. 

View(temp1)
#with my data
P <- .407
prep= (1 + (p/ (1- p))^(2/3) )^(-1)  
prep

#Interpretation: The Prep for the original paper and for mine came up with the same result: .89 
#This seesm to suggest the possibility of Type 1 erer in the original paper. 
##########################################################################
#Odds ratio # Original paper: 3.28
##########################################################################

#the odds for Birds-seekers find saying they saw fewer birds 

result_Birds <- (21/8) / (12/15)
result_Birds # 3.28125
##Expected explanation: This tells me that bird seekers have 3.28 times greater odds of saying there were fewer birds 
##then flower seekers saying there were fewer birds. 

##########################################################################
#Pearson's R # Original paper: (Pearson's r=-.047, N=63, p=.712)
##########################################################################
###MCheck7: “The type of picture for which I received money was randomly decided.”
###FreqRatio: FreqFlower - FreqBird (neg = more bird, pos = more flower)

cor(temp1$MCheck7, temp1$FreqRatio) #: 0.1801392 df = 79
cor.test(temp1$MCheck7, temp1$FreqRatio) #p-value =  0.1076
cor(temp1$MCheck7, temp1$FreqCategory) #0.1323565
cor.test(temp1$MCheck7, temp1$FreqCategory) #p-value = 0.2388 df = 79
# WHAT CAN YOU SAY ABOUT THIS RELATIONSHIP? Is p < .05? No
# Compare with the paper: (Pearson's r=-.047, N=63, p=.712)
## try something new 
cor.test(temp1$MCheck6, temp1$FreqCategory) #r= -0.1350474, p-value = 0.248
cor.test(temp1$MCheck6, temp1$FreqRatio) #r =.-0.01073329 ,  p-value = 0.9272

# R-squared
cor(temp1$MCheck7, temp1$FreqRatio)^2 #0.03245012
# The two items are sharing roughly 32.5 % of their variance.

# Pearson's r with significance test 
cor.test(temp1$MCheck7, temp1$FreqRatio,method="pearson") #same result 

######## End ############


#Try something new 1#
## I found Figure 1 in the paper can be misleading/confusing. So I decided to present the result in a differnt way. 
T
flowers <-  c(15,21)
equal <- c(4,3)
birds <- c(12, 8)
t2 <- rbind(flowers, equal, birds)
# Name rows and columns
colnames(t2) <- c("Flowers","Birds")
rownames(t2) <- c("fewer targets","equal", "more targets")
t2
#percentages 
#Flowers  Birds
#fewer targets 48.38710 65.625
#Equal        12.90323  9.375
#more targets  38.70968 25.000

t2=prop.table(t2,2)
t2=prop.table(t2,2)*100
barplot(t2, main="Figure 1", xlab="Endowed Category", ylab="Percentage", legend = rownames(t2))
barplot(t2, main="Figure 1", xlab="Endowed Category", ylab="Percentage", legend.text=c("Fewer Targets","Equal","More Targets"))
barplot(t2, main="Figure 1", xlab="Endowed Category", ylab="Percentage") #without legend 


## Try something new 2# 
# 1. t-test between flower groups vs birds #t = 1.1103, df = 71.787, p-value = 0.2706
# Independent t-test: t.test(outcome~grouping variable)
res.ind <- t.test(temp1$FreqCategory~temp1$CoinFlip,paired=FALSE) #paired=FALSE is used for independent t-test
res.ind
# WHAT DOES THIS RESULT TELL US? No group difference between the two. Two groups are not much different. 

## Try something new 3 # Include "equal groups" into the chi-square test
## Why? The authors compared "fewer birds" vs "fewer flowers" and left "equal." 
## I decided to include this equal group into statistical analysis based on their hypothesis and tested the group differece again.
Targets <- c(15, 21)
Equal_XTargets <- c(16, 11)
t3 <- rbind(Targets, Equal_XTargets)
colnames(t3) <- c("Flowers","Birds")
rownames(t3) <- c("fewer targets","Equal/more targets")
t3

# get chi-square test

chisq.test(t3,correct=TRUE) #X-squared = 1.2715, df = 1, p-value = 0.2595
## Compare with the original paper: X-squared = 4.51, p-value = 0.037, Prep =.93 

#Interpretation: When "equal" group was added, the result no longer support authors' claim. 
## This could be the reason why they excluded "eqaul" group without explanation.
